home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The PC-SIG Library 9
/
The PC-SIG Library on CD ROM - Ninth Edition.iso
/
401_500
/
DISK0420
/
DISK0420.ZIP
/
FCOMPARE.C
< prev
next >
Wrap
Text File
|
1985-05-02
|
13KB
|
415 lines
/**********************************************************************
Copyright (c) 1985, Charles Daney
All Rights Reserved
The FCOMPARE program implements Paul Heckel's algorithm from the
Communications of the ACM, April 1978, for detecting the differences
between two files. This algorithm has the advantage over more commonly
used compare algorithms that it is fast and can detect differences of an
arbitrary number of lines.
A full statement of terms of use and disclaimer of warranty appears at
the end of this file.
The source code has been written for the CI C86 compiler. It uses
declarations of 'unsigned long' and 'unsigned char', so a prerequisite
to using any other compiler is support for these types.
Modification history.
Author Date Purpose
Charles Daney 4/10/85 original release
Author's address:
Charles Daney
Quercus Systems
19567 Dorchester Drive
Saratoga, CA 95070
(408) 257-8440
**********************************************************************/
#include "stdio.h"
#define MAXLINES 5000
#define FULL 0x80
#define TABS 0x40
#define TRIM 0x20
#define ON 1
#define OFF 0
long hash1[MAXLINES], hash2[MAXLINES];
unsigned char occ1[8192], occ2[8192];
int n1, n2;
FILE *file1, *file2;
unsigned char flag1;
struct {
char *optname;
char min;
char *flag;
unsigned char bit;
unsigned char onoff; } optable[] = {
{ "FULL", 1, &flag1, FULL, ON },
{ "BRIEF", 1, &flag1, FULL, OFF },
{ "TABS", 1, &flag1, TABS, ON },
{ "NOTABS", 3, &flag1, TABS, OFF },
{ "TRIM", 2, &flag1, TRIM, ON },
{ "NOTRIM", 4, &flag1, TRIM, OFF },
{ 0, 0, 0, 0, 0 } };
/* main program */
main(argc,argv)
int argc;
char *argv[];
{
int i, j, k;
unsigned char linked;
puts("V1.1 Copyright (c) 1985, Charles Daney\n");
flag1 = 0;
if (argc<3) {
printf("Format is: FCOMPARE filespec1 filespec2 [options]\n");
printf(" options:\n");
printf(" Full - show full lines.\n");
printf(" Brief - show first 34 characters of lines.\n");
printf(" Tabs - expand tabs before comparing.\n");
printf(" NOTabs - don't expand tabs.\n");
printf(" TRim - ignore trailing blanks.\n");
printf(" NOTRim - don't ignore trailing blanks.\n");
return;
}
/* examine possible options */
for (i=3; i<argc; i++) {
for (j=0; optable[j].optname; j++) {
if (match(argv[i],optable[j].optname,optable[j].min)) {
if (optable[j].onoff==ON)
*optable[j].flag |= optable[j].bit;
else *optable[j].flag &= ~optable[j].bit;
break;
}
}
if (optable[j].optname==0) {
printf("Invalid parameter: '%s'.\n",argv[i]);
return;
}
}
file1 = fopen(argv[1],"r");
if (file1==0) {
printf("Unable to open file '%s'.\n",argv[1]);
return;
}
file2 = fopen(argv[2],"r");
if (file2==0) {
printf("Unable to open file '%s'.\n",argv[2]);
return;
}
/* step 1: read first file and hash it */
printf("Reading file '%s'.\n",argv[1]);
n1 = input(file1,hash1,occ1);
fseek(file1,0L,0);
/* step 2: read second file and hash it */
printf("Reading file '%s'.\n",argv[2]);
n2 = input(file2,hash2,occ2);
fseek(file2,0L,0);
/* step 3: identify lines that are unique in both files */
for (i=0; i<8192; i++) occ1[i] &= occ2[i];
/* step 4: link together matching unique lines */
for (i=0; i<n1; i++) {
if (getbits(occ1,-hash1[i])!=1) continue;
for (j=0; i+j<n2 || i-j>=0; j++) {
if (i+j<n2) if (hash2[i+j]==hash1[i]) {
hash1[i] = i+j;
hash2[i+j] = i;
break;
}
if (i-j>=0) if (hash2[i-j]==hash1[i]) {
hash1[i] = i-j;
hash2[i-j] = i;
break;
}
}
}
/* step 5: link the first and last lines, if possible */
if (hash1[0]<0 && hash1[0]==hash2[0]) hash1[0] = hash2[0] = 0;
if (hash1[n1-1]<0 && hash1[n1-1]==hash2[n2-1]) {
hash1[n1-1] = n2-1;
hash2[n2-1] = n1-1;
}
/* step 6: starting from linked lines, link following lines that match */
linked = 0;
for (i=0; i<n1; i++) {
if (hash1[i]>=0) linked = 1;
else if (linked==1) {
if (hash1[i]==hash2[hash1[i-1]+1]) {
hash1[i] = hash1[i-1]+1;
hash2[hash1[i]] = i;
}
else linked = 0;
}
}
/* step 7: link matching lines that precede linked lines */
linked = 0;
for (i=n1-1; i>=0; i--) {
if (hash1[i]>=0) linked = 1;
else if (linked==1) {
if (hash1[i]==hash2[hash1[i+1]-1]) {
hash1[i] = hash1[i+1] - 1;
hash2[hash1[i]] = i;
}
else linked = 0;
}
}
/* for (i=0; i<n1 || i<n2; i++) printf("%12ld %12ld\n",hash1[i],hash2[i]);*/
/* step 8: display the results */
for (i=j=0; i<n1 && j<n2;) {
if (hash1[i]<j && hash2[j]<i) {
output(i++,j++);
continue;
}
if (hash1[i]<j) {
output(i++,-1);
continue;
}
if (hash2[j]<i) {
output(-1,j++);
continue;
}
if (hash1[i]==j) {
for (k=1; i+k<=n1 && j+k<=n2 && hash1[i+k]==j+k; k++);
printf("\n*** %d line(s) match. ***\n\n",k);
i += k;
j += k;
continue;
}
if (hash1[i]-j <= hash2[j]-i) {
for (k=j; k<hash1[i]; k++) output(-1,k);
j = hash1[i];
continue;
}
else {
for (k=i; k<hash2[j]; k++) output(k,-1);
i = hash2[j];
continue;
}
}
if (i<n1) for (k=i; k<n1; k++) output(k,-1);
if (j<n2) for (k=j; k<n2; k++) output(-1,k);
fclose(file1);
fclose(file2);
}
/* read in file, build hash & occurrence tables */
input(file,hashvect,occ)
int file;
long hashvect[];
unsigned char occ[];
{
int i, j;
long h;
unsigned char bits, buffer[256], temp[256];
long hash();
for (i=0; i<MAXLINES; i++) {
if ((flag1&TABS)==0) {
if (fgets(buffer,256,file)==0) return(i);
}
else {
if (fgets(temp,256,file)==0) return(i);
tabex(temp,buffer);
}
if (flag1&TRIM) {
for (j=0; j<256 && j>=0 && buffer[j] && buffer[j]!='\n'; j++);
for (j=j-1; j>=0 && buffer[j]==' ' ; j--);
buffer[j+1] = 0;
}
h = hash(buffer);
if (h<0) hashvect[i] = h; else hashvect[i] = -h;
bits = getbits(occ,-hashvect[i]);
if (bits==0) setbits(occ,-hashvect[i],1);
else if (bits==1) setbits(occ,-hashvect[i],2);
}
printf("File truncated at %d lines.\n",MAXLINES);
return(i-1);
}
/* hash a character string */
long hash(s)
unsigned char *s;
{
long h=0, h1;
while (*s) {
h1 = h;
h = h<<1;
if (h1<0) h |= 1;
h ^= *s++;
}
if (h==0) h = 1;
return(h);
}
/* extract bits from the occurrence vector */
getbits(array,indx)
unsigned char *array;
unsigned long indx;
{
unsigned i, j;
indx &= 32767;
i = indx>>2;
j = indx - (i<<2);
return(array[i]>>((3-j)<<1) & 0x03);
}
/* store bits in the occurrence array */
setbits(array,indx,x)
unsigned char *array;
unsigned long indx;
unsigned char x;
{
unsigned i, j, shift;
indx &= 32767;
i = indx>>2;
j = indx - (i<<2);
shift = (3-j)<<1;
array[i] &= ~(0x03<<shift);
array[i] |= x<<shift;
}
/* display the results of comparison */
output(l1,l2)
int l1, l2;
{
static int cl1 = 0, cl2 = 0;
char line[81];
unsigned int bi1=0, bi2=0;
unsigned char end1=1, end2=1;
int i;
char buffer1[256], buffer2[256], temp[256];
for (i=0; i<80; i++) line[i] = ' ';
line[80] = 0;
if (l1>=0) {
for (i=cl1; i<=l1; i++)
if((flag1&TABS)==0) fgets(buffer1,256,file1);
else {
fgets(temp,256,file1);
tabex(temp,buffer1);
}
cl1 = l1 + 1;
sprintf(line,"%4d ",l1+1);
line[5] = ' ';
for (i=0; buffer1[i+bi1] && buffer1[i+bi1]!='\n' && i<34; i++)
line[i+5] = buffer1[i+bi1];
if (i==34) {
bi1 += 34;
end1 = 0;
}
}
if (l2>=0) {
for (i=cl2; i<=l2; i++)
if((flag1&TABS)==0) fgets(buffer2,256,file2);
else {
fgets(temp,256,file2);
tabex(temp,buffer2);
}
cl2 = l2 + 1;
sprintf(line+40,"%4d ",l2+1);
line[45] = ' ';
for (i=0; buffer2[i+bi2] && buffer2[i+bi2]!='\n' && i<34; i++)
line[i+45] = buffer2[i+bi2];
if (i==34) {
bi2 += 34;
end2 = 0;
}
}
line[45+i] = '\n';
line[46+i] = 0;
fwrite(line,1,46+i,stdout);
if (flag1 & FULL) while (!end1 || !end2) {
for (i=0; i<80; i++) line[i] = ' ';
if (!end1) {
for (i=0; buffer1[i+bi1] && buffer1[i+bi1]!='\n' && i<34; i++)
line[i+5] = buffer1[i+bi1];
if (i==34) bi1 += 34; else end1 = 1;
}
if (!end2) {
for (i=0; buffer2[i+bi2] && buffer2[i+bi2]!='\n' && i<34; i++)
line[i+45] = buffer2[i+bi2];
if (i==34) bi2 += 34; else end2 = 1;
}
line[45+i] = '\n';
line[46+i] = 0;
fwrite(line,1,46+i,stdout);
}
}
/* match strings with specified minimum */
match(s1,s2,min)
unsigned char *s1, *s2, min;
{
unsigned int i;
for (i=0; *s1 && *s2; i++) if (toupper(*s1++) != *s2++) return(0);
if (*s1==0) return(i>=min);
return(0);
}
/* expand tabs */
tabex(s1,s2)
unsigned char *s1, *s2;
{
int i;
unsigned char j;
for (i=j=0; s1[i]; i++) {
if (s1[i] != 0x09) {
s2[j++] = s1[i];
continue;
}
do s2[j++] = ' '; while(j%8 != 0);
}
s2[j] = 0;
}
/**********************************************************************
Permission is granted to freely distribute this code, but not for
profit, provided that this notice and the following disclaimer are
included in their entirety and without modifications of any sort. This
work may not be sold (except for a nominal media charge), or included
in any other work to be sold, without the written permission of the
author.
If you modify this code, please include your name and a description of
the modification in the change history at the top of this file.
Modifications should be clearly indicated as to location and purpose,
with descriptive comments that clearly identify altered lines. The
author would appreciate hearing of any modifications that may be made.
Disclaimer:
This program is provided "as-is" without warranty of any kind, either
expressed or implied, including, but not limited to the implied
warranties of merchantability and fitness for a particular purpose. The
entire risk as to the results and performance of the program is assumed
by the user. Should the program prove defective, you (and not the
author) assume the entire cost of all necessary servicing, repair, or
correction.
Neither the author nor anyone else who has been involved in the
creation, production or delivery of this program shall be liable for any
direct, indirect, consequential or incidental damages arising out of the
use or inability to use this program.
**********************************************************************/